import pandas as pd  
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder  
from sklearn.cluster import KMeans  
import pickle
from decision_company import read_csv_file, col_copy, create_standard_scaler, fit_transform_standard_scaler, create_label_encoder, fit_transform_label_encoder, get_dummies, create_kmeans, fit_predict_kmeans, fetch_column, check_elements_in_list, extract_unique_values, convert_np_to_list, col_assign_val, series_get_quantile

  
# Load the dataset  
credit_customers = read_csv_file("credit_customers.csv")  
  
# Extract the important columns  
important_columns = ['credit_history', 'age', 'employment', 'credit_amount', 'savings_status']  
data_for_clustering = col_copy(credit_customers, important_columns)  
  
# Preprocess the data by applying Label Encoding to 'savings_status' and 'employment'  
col_assign_val(data_for_clustering, 'savings_status', fit_transform_label_encoder(create_label_encoder(), data_for_clustering['savings_status']) ) 
col_assign_val(data_for_clustering, 'employment', fit_transform_label_encoder(create_label_encoder(), data_for_clustering['employment'])) 
  
# Apply One-Hot Encoding to 'credit_history'  
data_for_clustering = get_dummies(data_for_clustering, columns=['credit_history'], drop_first=True)  
  
# Normalize the data using Standard Scaling  
data_for_clustering_scaled = fit_transform_standard_scaler(create_standard_scaler(), data_for_clustering)  
  
# Perform K-means clustering with 4 clusters  
kmeans = create_kmeans(n_clusters=4, random_state=42)  
cluster_labels = fit_predict_kmeans(kmeans, data_for_clustering_scaled)  
  
# Add the cluster labels to the original dataset  
col_assign_val(credit_customers, 'cluster', cluster_labels)  
  
# Identify additional customer segments with potential for responsiveness to promotions and financing options  
# Criteria: customers with a good credit history and a high credit amount  
good_credit_history = check_elements_in_list(fetch_column(credit_customers, 'credit_history'), ['no credits/all paid'])  
high_credit_amount = fetch_column(credit_customers, 'credit_amount') > series_get_quantile(fetch_column(credit_customers, 'credit_amount'), q=0.75)  
  
# Combine the filters and apply them to the dataset  
potential_customers = credit_customers[good_credit_history & high_credit_amount]  
  
# Extract the additional customer segments  
additional_customer_segments = convert_np_to_list(extract_unique_values(fetch_column(potential_customers, 'cluster')))  
  
# Exclude the target customer segments identified in the previous step  
target_customer_segments = [1, 2]  # Replace this list with the target customer segments from the previous step  
additional_customer_segments = [segment for segment in additional_customer_segments if segment not in target_customer_segments]  
  
# Return the list of additional customer segments  
print("additional_customer_segments:\n", additional_customer_segments)  
pickle.dump(additional_customer_segments, open("./ref_result/additional_customer_segments.pkl", "wb")) 